# coding=utf-8
import scrapy
from yu_spider.items import *
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor


class DyskSpider(CrawlSpider):
    name = 'dysk'
    allowed_domains = ['youku.com']
    start_urls = [
        'http://i.youku.com/u/UMTg5NDkzMDM3Ng==/videos',
        'http://i.youku.com/u/UMTg5NDkzMDM3Ng==/videos/fun_ajaxload/?__rt=1&__ro=&v_page=1&page_num=2&page_order=1&q=&last_str=',
    ]

    rules = (
        Rule(LinkExtractor(allow=('http://v\.youku\.com/v_show/.*?\.html\?from=.*?', )),
             follow=True, callback='parse_item'),
    )

    def parse_item(self, response):
        sel = scrapy.Selector(response)
        items = []
        item = YuSpiderItem()
        item['title'] = sel.xpath('//h1[@class="title"]/text()').extract()[0]
        item['link'] = sel.xpath('//input[@id="link4"]/@value').extract()[0]
        item['desc'] = sel.xpath('//div[@id="text_long"]/text()').extract()[0]
        items.append(item)
        return items
